********************************************************************************
*Project:  	Swiss Trade During the Covid-19 Pandemic: An Early Appraisal	
*Authors:	KB, SL, VP, PW											
*This File: Clean Classifications on Country Codes of UNCTAD
*Version: 	vFinal						
*Date: 		2020-09-01						
********************************************************************************

********************************************************************************
*1. Import & Clean Main UNCTAD-Classifications-Files
*Source: https://unctadstat.unctad.org/en/classifications.html
********************************************************************************
*ISO-CODES 3166/ALPHA-2
import excel "${root}\data\UNCTAD_Classifications\countries_iso_codes.xls", sheet("Transcodes_EN") clear

drop F G

rename A land_iso2
rename B land_iso3
rename C land_num
rename D land_str
rename E land_num_UNCTAD

drop if land_num==""
drop if land_num=="Num."

destring land_num, replace force
destring land_num_UNCTAD, replace force

compare land_num land_num_UNCTAD
gen tag=1 if land_num!=land_num_UNCTAD
*br if tag==1
drop tag land_num /*Small differences to land_num UNCTAD; later is used in other files*/
rename land_num_UNCTAD land_num

sort land_num
duplicates tag land_num, gen(dup)

*Replace identifier of duplicates
*Finland
replace land_num=246001 if dup>0 & land_iso2=="AX" & land_num==246
*France
replace land_num=251001 if dup>0 & land_iso2=="GP" & land_num==251
replace land_num=251002 if dup>0 & land_iso2=="YT" & land_num==251
replace land_num=251003 if dup>0 & land_iso2=="RE" & land_num==251
replace land_num=251004 if dup>0 & land_iso2=="MQ" & land_num==251
replace land_num=251005 if dup>0 & land_iso2=="MC" & land_num==251
replace land_num=251006 if dup>0 & land_iso2=="GF" & land_num==251
*Norway
replace land_num=579001 if dup>0 & land_iso2=="SJ" & land_num==579
*Switzerland
replace land_num=757001 if dup>0 & land_iso2=="LI" & land_num==757
*USA
replace land_num=842001 if dup>0 & land_iso2=="PR" & land_num==842
replace land_num=842002 if dup>0 & land_iso2=="VI" & land_num==842
*UK
replace land_num=926001 if dup>0 & land_iso2=="GG" & land_num==926
replace land_num=926002 if dup>0 & land_iso2=="JE" & land_num==926
replace land_num=926003 if dup>0 & land_iso2=="IM" & land_num==926
drop dup

order land_num land_str land_iso2 land_iso3
compress
saveold "${root}\temp\UNCTAD_iso2_iso3.dta", version(13) replace 

*TRADE GROUPS
import excel "${root}\data\UNCTAD_Classifications\countries_trade_groups.xls", sheet("DimCountries_TradeGroups_Hierar") clear case(lower)

drop E F G A B

drop if D==""

rename C land_id_num
rename D land_str

drop if land_str=="Label"

destring land_id_num, replace

clonevar id_x=land_id_num

replace id_x =. if id_x<1000
replace id_x = id_x[_n-1] if id_x==.

gen str_x=land_str if id_x==land_id_num
replace str_x = str_x[_n-1] if str_x==""

drop if land_id_num==id_x

gen str_x_short=""
split str_x, parse (":")
replace str_x_short=str_x1 if str_x2!=""
drop str_x1 str_x2

split str_x, parse ("(")
replace str_x_short=str_x1 if str_x2!="" & str_x_short==""
drop str_x1 str_x2

replace str_x_short=str_x if str_x_short==""
replace str_x_short=stritrim(strltrim(strrtrim(str_x_short)))
replace str_x_short="EU2020" if str_x_short=="European Union"
replace str_x_short="Eurozone" if str_x_short=="Euro area"

*KEEP EU & NAFTA VARIABLES
keep if inlist(str_x_short, "EU15", "EU25", "EU27", "EU28", "Eurozone", "EU2020", "NAFTA", "EFTA")

keep land_id_num land_str str_x str_x_short
rename str_x_short land_trade_group_short
rename str_x land_trade_group 

levelsof land_trade_group_short, local(ltg)
foreach g of local ltg {
	gen `g'=0
	replace `g'=1 if land_trade_group_short=="`g'"
}

collapse (sum) `ltg', by(land_id_num land_str)

gen EU_EFTA_2020=0
replace EU_EFTA_2020=1 if EU2020==1 | EFTA==1

rename land_id_num land_num
order land_num land_str EU2020 Eurozone NAFTA EU15 EU25 EU27 EU28 EFTA EU_EFTA_2020
compress
saveold "${root}\temp\UNCTAD_trade_groups_EU_EURO_NAFTA.dta", version(13) replace

*CONTINENTS
import excel "${root}\data\UNCTAD_Classifications\countries_geographical_groups.xls", sheet("DimCountries_Geographics_Hierar") clear

drop E F G A B

drop if D==""

rename C land_id_num
rename D land_str

drop if land_str=="Label"

destring land_id_num, replace
clonevar id_x=land_id_num

replace id_x =. if id_x<1000
replace id_x = id_x[_n-1] if id_x==.

gen id_continents=.
replace id_continents=id_x if inlist(id_x, 5100, 5200, 5300, 5400, 5500)
replace id_continents = id_continents[_n-1] if id_continents==.

gen str_continents6=""
replace str_continents6="Africa" if inlist(id_continents, 5100)
replace str_continents6="America" if inlist(id_continents, 5200)
replace str_continents6="Asia" if inlist(id_continents, 5300)
replace str_continents6="Europe" if inlist(id_continents, 5400)
replace str_continents6="Oceania" if inlist(id_continents, 5500)

gen str_continents7=""
replace str_continents7="Africa" if inlist(id_continents, 5100)
replace str_continents7="Latin America" if id_x>=5220 & id_x<=5223
replace str_continents7="Northern America" if id_x==5210
replace str_continents7="Asia" if inlist(id_continents, 5300)
replace str_continents7="Europe" if inlist(id_continents, 5400)
replace str_continents7="Oceania" if inlist(id_continents, 5500)

drop if land_id_num>999

drop id_x id_continents

rename land_id_num land_num
order land_num land_num str_continents6 str_continents7
compress
saveold "${root}\temp\UNCTAD_continents.dta",  version(13) replace

********************************************************************************
*2. Merge UNCTAD-Classifications-Files to single file 
********************************************************************************

use "${root}\temp\UNCTAD_iso2_iso3.dta", clear

*TRADE GROUPS
*************
merge 1:1 land_num using "${root}\temp\UNCTAD_trade_groups_EU_EURO_NAFTA.dta"
drop if _merge==2 /*DDR, Westdeutschland, Czechoslovakia*/
rename _merge merge_trade_groups

foreach var in EU2020 Eurozone NAFTA EU15 EU25 EU27 EU28 EFTA EU_EFTA_2020 {
	replace `var'=0 if `var'==.
}

*GEOGRAPHIC GROUPS
******************
merge 1:1 land_num using "${root}\temp\UNCTAD_continents.dta"
drop if _merge==2
rename _merge merge_geopraphic_groups

rename land_num land_UNCTAD

rename str_continents* land_region*

order land_UNCTAD land_str land_iso2 land_iso3									///
land_region6 land_region7 EU2020 Eurozone NAFTA EU25 EU27 EU28			
compress
saveold "${root}\temp\UNCTAD_country_groups_raw.dta",  version(13) replace
